{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.datasets import load_boston\n",
    "\n",
    "from sklearn.model_selection import train_test_split, cross_val_score\n",
    "from sklearn import metrics\n",
    "from sklearn.impute import SimpleImputer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-11.22504076,  -5.3945749 ,  -4.74755867, -22.54699078,\n",
       "       -12.31243335, -17.18030718,  -6.94019868, -94.14567212,\n",
       "       -28.541145  , -14.6250416 ])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "boston = load_boston()\n",
    "regressor = RandomForestRegressor(n_estimators=100, random_state=0)\n",
    "cross_val_score(regressor, boston.data, boston.target, cv=10, scoring=\"neg_mean_squared_error\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['accuracy',\n",
       " 'adjusted_mutual_info_score',\n",
       " 'adjusted_rand_score',\n",
       " 'average_precision',\n",
       " 'balanced_accuracy',\n",
       " 'completeness_score',\n",
       " 'explained_variance',\n",
       " 'f1',\n",
       " 'f1_macro',\n",
       " 'f1_micro',\n",
       " 'f1_samples',\n",
       " 'f1_weighted',\n",
       " 'fowlkes_mallows_score',\n",
       " 'homogeneity_score',\n",
       " 'jaccard',\n",
       " 'jaccard_macro',\n",
       " 'jaccard_micro',\n",
       " 'jaccard_samples',\n",
       " 'jaccard_weighted',\n",
       " 'max_error',\n",
       " 'mutual_info_score',\n",
       " 'neg_brier_score',\n",
       " 'neg_log_loss',\n",
       " 'neg_mean_absolute_error',\n",
       " 'neg_mean_gamma_deviance',\n",
       " 'neg_mean_poisson_deviance',\n",
       " 'neg_mean_squared_error',\n",
       " 'neg_mean_squared_log_error',\n",
       " 'neg_median_absolute_error',\n",
       " 'neg_root_mean_squared_error',\n",
       " 'normalized_mutual_info_score',\n",
       " 'precision',\n",
       " 'precision_macro',\n",
       " 'precision_micro',\n",
       " 'precision_samples',\n",
       " 'precision_weighted',\n",
       " 'r2',\n",
       " 'recall',\n",
       " 'recall_macro',\n",
       " 'recall_micro',\n",
       " 'recall_samples',\n",
       " 'recall_weighted',\n",
       " 'roc_auc',\n",
       " 'roc_auc_ovo',\n",
       " 'roc_auc_ovo_weighted',\n",
       " 'roc_auc_ovr',\n",
       " 'roc_auc_ovr_weighted',\n",
       " 'v_measure_score']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(metrics.SCORERS.keys())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 使用随即森林填补缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = load_boston()\n",
    "dataset.data.shape\n",
    "#总共506*13=6578个数据\n",
    "X_full, y_full = dataset.data, dataset.target\n",
    "n_samples = X_full.shape[0]\n",
    "n_features = X_full.shape[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "添加缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#首先确定我们希望放入的缺失数据的比例,在这里我们假设是50%,那总共就要有3289个数据缺失\n",
    "rng = np.random.RandomState(0)\n",
    "missing_rate = 0.5\n",
    "n_missing_samples = int(np.floor(n_samples * n_features * missing_rate))\n",
    "#np.floor向下取整,返回.0格式的浮点数\n",
    "\n",
    "#所有数据要随机遍布在数据集的各行各列当中,而一个缺失的数据会需要一个行索引和一个列索引\n",
    "#如果能够创造一个数组,包含3289个分布在0~506中间的行索引,和3289个分布在0~13之间的列索引,那我们就可\n",
    "#以利用索引来为数据中的任意3289个位置赋空值\n",
    "#然后我们用0,均值和随机森林来填写这些缺失值,然后查看回归的结果如何\n",
    "\n",
    "missing_features = rng.randint(0,n_features,n_missing_samples)\n",
    "missing_samples = rng.randint(0,n_samples,n_missing_samples)\n",
    "\n",
    "#missing_samples = rng.choice(dataset.data.shape[0],n_missing_samples,replace=False)\n",
    "#我们现在采样了3289个数据,远远超过我们的样本量506,所以我们使用随机抽取的函数randint。但如果我们需要\n",
    "#的数据量小于我们的样本量506,那我们可以采用np.random.choice来抽样,choice会随机抽取不重复的随机数,\n",
    "#因此可以帮助我们让数据更加分散,确保数据不会集中在一些行中\n",
    "\n",
    "X_missing = X_full.copy()\n",
    "y_missing = y_full.copy()\n",
    "\n",
    "X_missing[missing_samples,missing_features] = np.nan\n",
    "X_missing = pd.DataFrame(X_missing)\n",
    "#转换成DataFrame是为了后续方便各种操作,numpy对矩阵的运算速度快到拯救人生,但是在索引等功能上却不如pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用0和均值填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用均值进行填补\n",
    "from sklearn.impute import SimpleImputer\n",
    "imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
    "X_missing_mean = imp_mean.fit_transform(X_missing)\n",
    "\n",
    "#使用0进行填补\n",
    "imp_0 = SimpleImputer(missing_values=np.nan, strategy=\"constant\",fill_value=0)\n",
    "X_missing_0 = imp_0.fit_transform(X_missing)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用随即森林填充缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "使用随机森林回归填补缺失值\n",
    "任何回归都是从特征矩阵中学习,然后求解连续型标签y的过程,之所以能够实现这个过程,是因为回归算法认为,特征\n",
    "矩阵和标签之前存在着某种联系。实际上,标签和特征是可以相互转换的,比如说,在一个“用地区,环境,附近学校数\n",
    "量”预测“房价”的问题中,我们既可以用“地区”,“环境”,“附近学校数量”的数据来预测“房价”,也可以反过来,\n",
    "用“环境”,“附近学校数量”和“房价”来预测“地区”。而回归填补缺失值,正是利用了这种思想。\n",
    "\n",
    "对于一个有n个特征的数据来说,其中特征T有缺失值,我们就把特征T当作标签,其他的n-1个特征和原本的标签组成新\n",
    "的特征矩阵。那对于T来说,它没有缺失的部分,就是我们的Y_test,这部分数据既有标签也有特征,而它缺失的部分,只有特征没有标签,就是我们需要预测的部分。\n",
    "\n",
    "\n",
    "特征T不缺失的值对应的其他n-1个特征 + 本来的标签:X_train\n",
    "特征T不缺失的值:Y_train\n",
    "\n",
    "特征T缺失的值对应的其他n-1个特征 + 本来的标签:X_test\n",
    "特征T缺失的值:未知,我们需要预测的Y_test\n",
    "\n",
    "这种做法,对于某一个特征大量缺失,其他特征却很完整的情况,非常适用。\n",
    "\n",
    "那如果数据中除了特征T之外,其他特征也有缺失值怎么办?\n",
    "答案是遍历所有的特征,从缺失最少的开始进行填补(因为填补缺失最少的特征所需要的准确信息最少)。\n",
    "填补一个特征时,先将其他特征的缺失值用0代替,每完成一次回归预测,就将预测值放到原本的特征矩阵中,再继续填\n",
    "补下一个特征。每一次填补完毕,有缺失值的特征会减少一个,所以每次循环后,需要用0来填补的特征就越来越少。当\n",
    "进行到最后一个特征时(这个特征应该是所有特征中缺失值最多的),已经没有任何的其他特征需要用0来进行填补了,\n",
    "而我们已经使用回归为其他特征填补了大量有效信息,可以用来填补缺失最多的特征。\n",
    "遍历所有的特征后,数据就完整,不再有缺失值了。\n",
    "\"\"\"\n",
    "\n",
    "X_missing_reg = X_missing.copy()\n",
    "# 找出数据集中缺失值最多的从小到大的排序\n",
    "sortindex = np.argsort(X_missing_reg.isnull().sum(axis=0)).values\n",
    "\n",
    "for i in sortindex:\n",
    "    #构建我们的新特征矩阵和新标签\n",
    "    df = X_missing_reg\n",
    "    fillc = df.iloc[:,i]\n",
    "    df = pd.concat([df.iloc[:,df.columns != i],pd.DataFrame(y_full)],axis=1)\n",
    "    #在新特征矩阵中,对含有缺失值的列,进行0的填补\n",
    "    df_0 =SimpleImputer(missing_values=np.nan,strategy='constant',fill_value=0).fit_transform(df)\n",
    "    #找出我们的训练集和测试集\n",
    "    Ytrain = fillc[fillc.notnull()]\n",
    "    Ytest = fillc[fillc.isnull()]\n",
    "    Xtrain = df_0[Ytrain.index,:]\n",
    "    Xtest = df_0[Ytest.index,:]\n",
    "    #用随机森林回归来填补缺失值\n",
    "    rfc = RandomForestRegressor(n_estimators=100)\n",
    "    rfc = rfc.fit(Xtrain, Ytrain)\n",
    "    Ypredict = rfc.predict(Xtest)\n",
    "    #将填补好的特征返回到我们的原始的特征矩阵中\n",
    "    X_missing_reg.loc[X_missing_reg.iloc[:,i].isnull(),i] = Ypredict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对填补好的数据进行建模"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#对所有数据进行建模,取得MSE结果\n",
    "X = [X_full,X_missing_mean,X_missing_0,X_missing_reg]\n",
    "mse = []\n",
    "std = []\n",
    "\n",
    "for x in X:\n",
    "    estimator = RandomForestRegressor(random_state=0, n_estimators=100)\n",
    "    scores = cross_val_score(estimator,x,y_full,scoring='neg_mean_squared_error',cv=5).mean()\n",
    "    mse.append(scores * -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAx0AAAGDCAYAAABOan2JAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dedglVXkv7N+jjYEAARNajyOtqNFIFBGJU5AkxsRZ0cQBjUSPYvRETWKM0cQonk85ajzBz6M2TijiGDUfTkf9joI4M8jggDEyOCEOiAIBE+E5f+x6ZfPaI/R6d9N939f1Xm/tqlVVT61d3df+7bWqu7o7AAAAo1xn0QUAAADbNqEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6ADYzlTVwVX1kUXXsTFV1VV1q6u574eq6nFbuqbRquo1VfX3G9j+/Kp6y0rWBLAlCB0AV0NVnVNV917AeQ+pqk9uRvs104f3VUvruvuY7r7PFq7r4Kq6ePq5tKqumHt98ZY816bo7vt295tW+rzXVHc/ubtfmCRVdWBVfeuaHG967y+Z3ocfVNXbqmr3a3jMX7intqSqOqqq/qOqLpp+vlhVL66q3TbjGAv58wmsn9ABwDU2BZldunuXJPdN8p2l19M6FueO03twyyTXT/L8xZazSV7S3bsmWZ3kT5PcNcmnqmrnxZYFXF1CB8A1NI0+fKqq/mdVXVhVZ1XV3af136yq781P9Zm+yX1NVX10+ib3+Krac9r2C98iV9VxVfVfq+p2SV6T5G7TN9cXTtvvX1VfqKqfTOd7/lx5n5h+Xzjtc7floyVTrSdW1Y+n33dfdu4XTtd3UVV9pKr22Mz+uXFVvbuqvl9VZ1fV0+a2XbeqnlNVX5+Of3JV3Wxu93tX1deq6kdV9b+qqub6/JNV9bJp29lVdd/lfTZ3jpdN3/SfVVVPne/j5d+KL5/CVFV3rapPT+/taVV14Ny2Q6ZjXjTVcPA6rn/HafRnj+n131XVz6rqV6bX/72q/mlaPmp6vXOSDyW58dyI0Y2nQ16vqt48nfNLVbXfprwP3f2TJMcm+Y1l782xVXVBVf1bVT1xbtv+VXXSdF+dX1Uvnzat6566znRd5073+5trGpmYu6cfV1XfmN6H525izZd194lJHpTk1zILIKmqvarqY1X1w+l4x9Q0glNVRye5eZL3TfU9a1r/rqr67nSff6Kqbr8pNQBbhtABsGX8VpLTM/tg9NYkb09ylyS3SvKYJK+sqvlv/A9O8sIkeyQ5NckxGztBd38lyZOTfGYaQViaJnNJkj9JsnuS+yf5s6p6yLTtgOn37tM+n5k/ZlX9apIPJHnFVPvLk3ygqn5trtmjM/uwd4Mk10vyzI3VOnf86yR5X5LTktwkye8leUZV/cHU5C+TPCrJ/ZL8SpLHJ/n3uUM8ILN+vGOSP07yB3PbfivJVzPrw5ckef1SKFnmidNx7pRkvyQP34z6b5JZ//z3JL+a2bW/u6pWT8HgFUnuO30rf/fM3sur6O7LkpyY5F7TqgOSnJvkHnOvj1+2zyX5xRGj70ybH5TZ/bV7ZiHilZt4LddP8pAkn51b/bYk30py48z65UVV9XvTtiOSHNHdv5JkryTvnKs3ueo9dcj08zuZjajsso667pnk1zO7B55XsxC9Sbr7oiQfTfLbS5eT5MVT3bdLcrNMIzjd/dgk30jywKm+l0z7fCjJrTO7j0/JJvyZA7YcoQNgyzi7u9/Y3ZcneUdmH4IO6+6fdvdHkvxHZgFkyQe6+xPd/dMkz81s9OJmv3jYjevu47r7jO6+ortPz+yD5L02tt/k/km+1t1Hd/fPuvttSc5M8sC5Nm/s7n/t7ksz++C5z2aUd5ckq7v7sO7+j+4+K8lrkzxy2v5fk/xdd3+1Z07r7h/O7X94d1/Y3d9I8vFl5z63u1879fmbktwoyQ3XUcMfJ/mn7v5md1+Q2YfVTfWYJB/s7g9O/fvRJCdlFpKS5Ioke1fVTt19Xnd/aT3HOT7JvabRlTtkFlbuVVU7Tn10wmbU9MmpnsuTHJ1ZINuQU2o2KvaDzEYA1ibJdL/dM8nfTCMKpyZ5XZLHTvv9Z5JbVdUe3X1xd392HcdecnCSl3f3Wd19cZK/TfLIuupzHy/o7ku7+7TMQujG6l7uO5kFv3T3v3X3R6c/X9/PLCxv8J7v7jd090XTn7nnJ7ljbcZzIsA1I3QAbBnnzy1fmiTdvXzd/EjHN5cWpg9pF2T2re1mq6rfqqqPT9OXfpzZaMimToG6cWbfus87N7NRiSXfnVv+91z1OjZmz8ymCF249JPkObkyHNwsydc3sP+Gzv3zbd29NDqyrtpunLn+zi9e74bsmeSPltV/zyQ3mkYjHpFZf59XVR+oqtuu5zjHJzkwyb5JzsjsW/t7Zfaswr919w82o6blfbJjbfih7n2nUbEdk7w6yQlT2LlxkgumUYQl8+/9E5LcJsmZNZt294ANnGP5fXRuklW5agi8JvdRprouSJKqukFVvb2qvl1VP0nylmzgnp+m2B1es2l8P0lyzrRps6YKAlef0AGwGD8f1ZimXf1qZt/kXjKt/uW5tv9lbrnXcay3ZjbN5mbdvVtmz33UBtrP+05mH6zn3TzJtzey36b6ZmajQLvP/eza3feb277XFjrX+pyXuf7O7PrmXZL19/c3kxy9rP6du/vwJOnuD3f372c2ynJmZqM46/LpzKYWPTTJ8d395amO+2fZ1Ko5G3vvNkt3/2dmIxm3SLJ3ppGDqtp1rtnP3/vu/lp3Pyqz6Uj/I8k/T1PK1lXX8vvo5kl+lquG8att+jNy71w5IvTiqY47TNO/HpMr7/mso8ZHJ3nwdIzdkqxZOvSWqA/YOKEDYDHuV1X3rKrrZfZsx+em6T/fz+xD32Omb2cfn6t+KD8/yU2n/Zbsmtk31pdV1f6ZfcBa8v3MpgDdcj11fDDJbarq0VW1qqoekdmDxu/fIleZfD7JT6rqb6pqp+ma9q6qu0zbX5fkhVV165q5w7LnSbaEdyZ5WlXddHqu4dnLtp+a2VSgHaaHsuef+XhLkgdW1R9Mte9Ys3/K9qZVdcOqetD0QfynSS5Ocvm6CphGYk5O8tRcGTI+neTQrD90nJ/k17bUFKCqum5mz+ZcmuSs7v7mVMOLp+u6Q2ajG8dM7R9TVau7+4okF06HuTzrvqfeluQvquoWU0B4UZJ3dPfPrmHNv1RVd07yL0l+lOSN06ZdM+vvC6fnbv562a7nL6tv18zeox9mFjBfdE3qAjaf0AGwGG9N8g+ZTRe5c2Zz4pc8MbMPUT9McvvMPhgu+ViSLyX5blUtTcl5SpLDquqiJM/LlQ/8Ln3Y/X8y++dGL6yqu84XMT0/8YAkfzWd71lJHrCZ033Wa3ru4IGZPYtxdmbPFbwus2+bk9lc/Hcm+UiSnyR5fZKdtsS557w2yYcze47glCTvWbb97zMLdj9K8oLM3pul+r+Z2Tfkz8nsw/Y3M3tvrjP9/FVm3/JfkNl0qadsoI7jk+yQWRBber1rrvzXoK6iu8/M7MP8WdN7d7Wm3yU5rWb/V8qPkjwuyUOnZ1uS2UP8a6ZreG+Sf5ieW0mSP0zypWnfI5I8cnr2Y1331Bsye77kE5m9z5cl+fOrWW+SPGu6ny9I8ubMAtvdpyltyex92jfJjzN70H/5e/riJH831ffM6RjnZhbov5yrPkwPrIDq3qKjtwBsRFUdleRb3f13i65le1RVazL7YLzDNf0mHoBNY6QDAAAYSugAAACGMr0KAAAYykgHAAAwlNABAAAMtaH/wZRtxB577NFr1qxZdBkAAGzjTj755B909+rl64WO7cCaNWty0kknLboMAAC2cVV17rrWm14FAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAw1KpFF8AKuOTc5POHLrqKq9p/7aIrAABghRjpAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgqI2Gjqq6vKpOraovVtX7qmr3lShslKo6pKpeOfgc+1TV/Ta3XVU9qKqePbI2AABYaZsy0nFpd+/T3XsnuSDJU6/pSavqutf0GJtxrlUrda45+yTZaOhY3q67j+3uw4dVBQAAC7C506s+k+QmSVJVe1XV/66qk6vqhKq67dz6z1bViVV1WFVdPK0/sKo+XlVvTXJGVV23ql46tTu9qg6d2t2oqj4xN7ry21Pbo6bXZ1TVX0xt95nOdXpVvbeqrj+tP66qXlRVxyd5+vouZjrmq6e6zqqqe1XVG6rqK1V11Fy7i6vqH6vqlKr6P1W1eu48+03Le1TVOVV1vSSHJXnEdA2PqKr9q+rTVfWF6fevr6fdz0dhqmrP6VynT79vPlfzK6bjnFVVD9/M9xAAAFbUJoeOaXTi95IcO606Msmfd/edkzwzyaum9UckOaK775LkO8sOs3+S53b3byR5QpIfT+3ukuSJVXWLJI9O8uHu3ifJHZOcmtmIwE26e+/u/s0kb5yO9+Ykf9Pdd0hyRpJ/mDvX7t19r+7+x41c2vWT/G6Sv0jyviT/M8ntk/xmVe0ztdk5ySndvW+S45ed5yq6+z+SPC/JO6YRonckOTPJAd19p2nbi9bTbt4rk7x5urZjkrxibtuNktwzyQOSGBkBAGCrtilTj3aqqlOTrElycpKPVtUuSe6e5F1VtdTul6bfd0vykGn5rUleNnesz3f32dPyfZLcYe6b+t2S3DrJiUneUFU7JPmX7j61qs5Kcsuq+n+TfCDJR6pqt8yCxfHT/m9K8q65cy3/EL8+7+vurqozkpzf3WckSVV9abrmU5NcMXe8tyR5zyYee8luSd5UVbdO0kl22IR97pbkoGn56CQvmdv2L919RZIvV9UN17VzVT0pyZOS5Ob/ZZfNLBcAALacTX6mI8meSa6X2TMd10ly4fQN/dLP7TbhWJfMLVdmIyVL+9+iuz/S3Z9IckCSbyc5uqr+pLt/lNmox3HT+V+3mefakJ9Ov6+YW156vb5Q1tPvn+XKPtxxA+d4YZKPT8/FPHAjbden55bn66zlDZOku4/s7v26e7/Vu1+d0wEAwJaxydOruvvHSZ6W2VSqS5OcXVV/lCQ1c8ep6WeTPGxafuQGDvnhJH82jWikqm5TVTtX1Z5Jvtfdr03y+iT7VtUeSa7T3e9O8vdJ9p3q+VFV/fZ0vMdmNvVphOskWRqReXSST07L5yS587Q8/2zFRUl2nXu9W2YhKkkO2UC7eZ/Olf138Nw5AQDgWmWzHiTv7i8kOS2zD8MHJ3lCVZ2W5EtJHjw1e0aSv6yqz2f27MGP13O41yX5cpJTquqLSdZmNrJwYJJTq+oLmYWXIzJ7eP24aZrXUUn+djrG45K8tKpOz+y5j8M253o2wyVJbl9VJ2f2/MfSeV6WWXD6dJI95tp/PMlvLD0gntnUqBdX1aeSXHcD7eY9LcmfTtf22GzggXgAANiaVXdvvNXmHLDqlzObktVV9cgkj+ruB29sv61ZVV3c3dfaByP2u93qPulNB2284Uraf+2iKwAAYAurqpO7e7/l60f8HxZ3TvLKmj1hfmGSxw84BwAAcC2xxUNHd5+Q2UPf24xr8ygHAAAs2ub+54AAAACbRegAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgqFWLLoAVsPOeyf5rF10FAADbKSMdAADAUEIHAAAwlNABAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAw1KpFF8B4556bHHrooqsAAFiMtWsXXQFGOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGGphoaOquqqOnnu9qqq+X1XvH3zeo6rq4YPP8ZCq+o3NbVdVh1XVvUfWBgAAK22RIx2XJNm7qnaaXv9+km8vsJ4t6SFJNho6lrfr7ud19/8/rCoAAFiARU+v+lCS+0/Lj0rytqUNVbVzVb2hqk6sqi9U1YOn9Wuq6oSqOmX6ufu0/sCqOq6q/rmqzqyqY6qqNnTyqjqnql5UVZ+pqpOqat+q+nBVfb2qnjx33E9U1Xur6stV9Zqqus607eK5Yz18GkW5e5IHJXlpVZ1aVXtV1ROn6zitqt5dVb+8nnY/H4Wpqt+brvuMqR9+aa7mF0zXfkZV3XaLvBMAADDIokPH25M8sqp2THKHJJ+b2/bcJB/r7rsk+Z3MPpzvnOR7SX6/u/dN8ogkr5jb505JnpHZ6MEtk9xjE2r4ZnffLckJSY5K8vAkd01y2Fyb/ZP8VZLfTLJXkoPWd7Du/nSSY5P8dXfv091fT/Ke7r5Ld98xyVeSPGE97ZIkU38cleQR3f2bSVYl+bO50/xguv5XJ3nmJlwjAAAszEJDR3efnmRNZqMcH1y2+T5Jnl1VpyY5LsmOSW6eZIckr62qM5K8K1edxvT57v5Wd1+R5NTp2Btz7PT7jCSf6+6Luvv7SS6rqt3njntWd1+e2WjMPTfrQmfTyE6Yaj44ye030v7Xk5zd3f86vX5TkgPmtr9n+n1y1nONVfWkafTmpMsu+/5mlgsAAFvOqkUXkNmH/pclOTDJr82tryQP6+6vzjeuqucnOT/JHTMLTZfNbf7p3PLl2bTrW9rnimX7XzG3fy/bp9exfscNnOOoJA/p7tOq6pDMrnVDNjgtLFfWud5r7O4jkxyZJKtX77e8fgAAWDGLnl6VJG9Iclh3n7Fs/YeT/PnScxlVdadp/W5JzptGMx6b5LorUOP+VXWL6VmORyT55LT+/Kq63bT+oXPtL0qy69zrXZOcV1U7ZDbSsb52S85MsqaqbjW9fmyS47fAdQAAwIpbeOiYpkMdsY5NL8xsKtXpVfXF6XWSvCrJ46rqs0luk9m/gjXaZ5IcnuSLSc5O8t5p/bOTvD/Jx5KcN9f+7Un+enoQfK8kf5/Z8yofzSxQrK9dkqS7L0vyp0neNU3JuiLJa0ZcGAAAjFbdZt5sSFUdmOSZ3f2ARddyda1evV8fdNBJiy4DAGAh1q5ddAXbj6o6ubv3W75+4SMdAADAtm1reJB8q9bdx2X2r2cBAABXg5EOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoVYtugDG23PPZO3aRVcBAMD2ykgHAAAwlNABAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAwlNABAAAMJXQAAABDrVp0AYx37o/PzaHvO3TRZQCwnVr7wLWLLgFYMCMdAADAUEIHAAAwlNABAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAwlNABAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAw1FYTOqrqoVV16rKfK6rqvgPOdfGWPuY6zvGcq9Ouqj49piIAAFiMrSZ0dPd7u3ufpZ8kr0pyQpIPb8r+NbPVXE+STQody9t1990H1AIAAAuzNX1I/7mquk2S5yV5bHdfMa3766o6sapOr6oXTOvWVNVXqupVSU5JcrOqemlVfbGqzqiqR2zkPAdW1fFV9c6q+teqOryqDq6qz0/77zW1O6qqXlNVJ0ztHjCtP6SqXjl3vPdPxzw8yU7TaM0x07Z/qaqTq+pLVfWkad262l08/a51Xct0/OOq6p+r6syqOqaqasv1PgAAbFmrFl3AclW1Q5K3Jnlmd39jWnefJLdOsn+SSnJsVR2Q5BtJfj3Jn3b3U6rqYUn2SXLHJHskObGqPtHd523glHdMcrskFyQ5K8nrunv/qnp6kj9P8oyp3Zok90qyV5KPV9Wt1nfA7n52Vf23acRmyeO7+4Kq2mmq693rabfkoHVdy7TtTklun+Q7ST6V5B5JPrmBawQAgIXZGkc6XpjkS9399rl195l+vpDZiMZtMwshSXJud392Wr5nkrd19+XdfX6S45PcZSPnO7G7z+vunyb5epKPTOvPyCxoLHlnd1/R3V/LLJzcdjOv62lVdVqSzya52Vz967Oha/l8d39rGgU6dVmdSZKqelJVnVRVJ13248s2s1QAANhytqqRjqo6MMnDkuy7fFOSF3f32mXt1yS5ZFm7zfXTueUr5l5fkav2Ty/br5P8LFcNbjuu6wTTdd07yd26+9+r6rj1tZ3fbRNrvjzreB+7+8gkRybJ6luvXl47AACsmK1mpKOqrp/kjUn+pLsvWrb5w0keX1W7TG1vUlU3WMdhPpHkEVV13apaneSAJJ/fQiX+UVVdZ3rO45ZJvprknCT7TOtvltn0ryX/OU0VS5LdkvxoChy3TXLX9bRbqWsBAIAVszWNdDw5yQ2SvHrZc9Ev7u53VNXtknxm2nZxksdk9i3/vPcmuVuS0zIbiXhWd393C9X31cymON0wyZO7+7Kq+lSSszObivXFzKZ+LTkyyelVdUqSxyd5clWdPh3ns+tq190Hb+xaptACAADXGtVt5s3GVNVRSd7f3f+86FqujtW3Xt0HvfygRZcBwHZq7QPXbrwRsE2oqpO7e7/l67ea6VUAAMC2aWuaXrXV6u5DFl0DAABcWxnpAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGGrVogtgvD132zNrH7h20WUAALCdMtIBAAAMJXQAAABDCR0AAMBQQgcAADCU0AEAAAwldAAAAEMJHQAAwFBCBwAAMJTQAQAADCV0AAAAQwkdAADAUEIHAAAwlNABAAAMJXQAAABDCR0AAMBQqxZdACvg3HOTQw9ddBWLs3btoisAANiuGekAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhI6NqKrLq+rUuZ81G2h7SFW9clp+flU9cxOOf/FGtu9eVU/Z3LoBAGBrsWrRBVwLXNrd+yzw/LsneUqSVy2wBgAAuNqMdFwNVXVOVe0xLe9XVcdtxr63qKrPVNWJVfXCufW7VNX/qapTquqMqnrwtOnwJHtNoywv3UA7AADYKhnp2LidqurUafns7n7oNTzeEUle3d1vrqqnzq2/LMlDu/snU6D5bFUdm+TZSfZeGm2pqlXratfdfQ3rAgCAIYSOjdvS06vukeRh0/LRSf7HtFxJXlRVByS5IslNktxwHfuvr913r9Ko6klJnpQkN99lly1YPgAAbB6h4+r5Wa6cmrbj1dh/XaMSBydZneTO3f2fVXXOeo69Se26+8gkRybJfqtXGwUBAGBhPNNx9ZyT5M7T8sM20G5dPpXkkdPywXPrd0vyvSlI/E6SPaf1FyXZdRPaAQDAVknouHpekOSIqjohyeWbue/Tkzy1qk7MLEAsOSbJflV1UmZh5Mwk6e4fJvlUVX2xql66vnYAALC1Ks8fb/v2W726TzrooEWXsThr1y66AgCA7UJVndzd+y1fb6QDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGWrXoAlgBe+6ZrF276CoAANhOGekAAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGEroAAAAhhI6AACAoYQOAABgKKEDAAAYSugAAACGEjoAAIChqrsXXQODVdX3k5y76DomeyT5waKL2I7o75Wlv1eOvl5Z+ntl6e+Vpb+3rD27e/XylUIHK6qqTuru/RZdx/ZCf68s/b1y9PXK0t8rS3+vLP29MkyvAgAAhhI6AACAoYQOVtqRiy5gO6O/V5b+Xjn6emXp75Wlv1eW/l4BnukAAACGMtIBAAAMJXQwTFXdrKo+XlVfqaovVdXTp/W/WlUfraqvTb+vv+har+020NfPr6pvV9Wp08/9Fl3rtqCqdqyqz1fVaVN/v2Ba794eYES96igAAASWSURBVAP97f4epKquW1VfqKr3T6/d2wOto7/d24NU1TlVdcbUrydN69zfK8D0KoapqhsluVF3n1JVuyY5OclDkhyS5ILuPryqnp3k+t39Nwss9VpvA339x0ku7u6XLbTAbUxVVZKdu/viqtohySeTPD3JQXFvb3Eb6O8/jPt7iKr6yyT7JfmV7n5AVb0k7u1h1tHfz497e4iqOifJft39g7l17u8VYKSDYbr7vO4+ZVq+KMlXktwkyYOTvGlq9qbMPhxzDWygrxmgZy6eXu4w/XTc20NsoL8ZoKpumuT+SV43t9q9Pch6+puV5f5eAUIHK6Kq1iS5U5LPJblhd5+XzD4sJ7nB4irb9izr6yT5b1V1elW9wZDxljNNhzg1yfeSfLS73dsDrae/E/f3CP+U5FlJrphb594eZ139nbi3R+kkH6mqk6vqSdM69/cKEDoYrqp2SfLuJM/o7p8sup5t2Tr6+tVJ9kqyT5LzkvzjAsvbpnT35d29T5KbJtm/qvZedE3bsvX0t/t7C6uqByT5XnefvOhatgcb6G/39jj36O59k9w3yVOr6oBFF7S9EDoYapp//e4kx3T3e6bV50/PICw9i/C9RdW3LVlXX3f3+dOHtSuSvDbJ/ouscVvU3RcmOS6z5wvc24PN97f7e4h7JHnQNO/97Ul+t6reEvf2KOvsb/f2ON39nen395K8N7O+dX+vAKGDYaaHP1+f5Cvd/fK5Tccmedy0/Lgk/99K17atWV9fL/0lOnloki+udG3boqpaXVW7T8s7Jbl3kjPj3h5iff3t/t7yuvtvu/um3b0mySOTfKy7HxP39hDr62/39hhVtfP0j62kqnZOcp/M+tb9vQJWLboAtmn3SPLYJGdMc7GT5DlJDk/yzqp6QpJvJPmjBdW3LVlfXz+qqvbJbA7rOUkOXUx525wbJXlTVV03sy9v3tnd76+qz8S9PcL6+vto9/eK8ff2ynqJe3uIGyZ57+x7uqxK8tbu/t9VdWLc38P5J3MBAIChTK8CAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgDYrlRVV9XRc69XVdX3q+r90+sbVtX7q+q0qvpyVX1wWr+mqi6tqlPnfv5kUdcBcG3i/+kAYHtzSZK9q2qn7r40ye8n+fbc9sOSfLS7j0iSqrrD3Lavd/c+K1cqwLbBSAcA26MPJbn/tPyoJG+b23ajJN9aetHdp69gXQDbJKEDgO3R25M8sqp2THKHJJ+b2/a/kry+qj5eVc+tqhvPbdtr2fSq317JogGurUyvAmC7092nV9WazEY5Prhs24er6pZJ/jDJfZN8oar2njabXgVwNRjpAGB7dWySl+WqU6uSJN19QXe/tbsfm+TEJAesdHEA2xKhA4Dt1RuSHNbdZ8yvrKrfrapfnpZ3TbJXkm8soD6AbYbpVQBsl7r7W0mOWMemOyd5ZVX9LLMv517X3SdO07H2qqpT59q+obtfMbxYgGu56u5F1wAAAGzDTK8CAACGEjoAAIChhA4AAGAooQMAABhK6AAAAIYSOgAAgKGEDgAAYCihAwAAGOr/AkciR/JZARVfAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 864x432 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "x_labels = ['Full data',\n",
    "           'Zero Imputation',\n",
    "           'Mean Imputation',\n",
    "           'Regressor Imputation']\n",
    "colors = ['r', 'g', 'b', 'orange']\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "ax = plt.subplot(111)\n",
    "for i in np.arange(len(mse)):\n",
    "    ax.barh(i, mse[i],color=colors[i], alpha=0.6, align='center')\n",
    "ax.set_title('Imputation Techniques with Boston Data')\n",
    "ax.set_xlim(left=np.min(mse) * 0.9,\n",
    "            right=np.max(mse) * 1.1)\n",
    "ax.set_yticks(np.arange(len(mse)))\n",
    "ax.set_xlabel('MSE')\n",
    "ax.set_yticklabels(x_labels)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
